********************************************************************************
* 
* Merging outcomes data from ID masterfile after resolving duplicates  
* 
* 
* By: MPG. 
* Created: 2023/01/27  
********************************************************************************

clear all 

***Creating dataset to merge outcomes data together 
use "$data\Processed\IDmasterfile", clear 
keep mci_uniq_id mci_uniq_id9  
keep if mci_uniq_id!=mci_uniq_id9 
duplicates drop 
duplicates report mci_uniq_id
dis _N //1003 observations 
tempfile crosswalktomerge 
save "`crosswalktomerge'"
 

*** Merging outcomes data matched with CPS 
use "$data\registration_voting_outcomes_withxwalk.dta" , clear 
keep if mci_uniq_id!=. //keep only merged observations as a result 
compress 

merge m:1 mci_uniq_id using "`crosswalktomerge'", gen(_merge)
drop if _merge==2
replace mci_uniq_id9=mci_uniq_id if mci_uniq_id9==.  
drop title last_name first_name middle_name suffix house_number house_number_suffix street_name apt_number address_line2 city state zip mail_address1 mail_address2 mail_city mail_state mail_zip party_code precinct_split_id date_last_changed custom_data1 home_phone mail_county load_date change_date address_key V1 V3 V4 V5 

*Turning qualitative variables to quantitative 
gen male=gender=="M" 
gen female=gender=="F" 
gen voter_status_active=voter_status=="A" if voter_status!=""
gen voter_status_inactive=voter_status=="I" if voter_status!=""
foreach x in 12262016 12242018 09142020 02112019 02062017 01112021 02082021 01172022 11192012 { 
	gen rep_`x'=registered_`x'=="R" if registered_`x'!=""
	gen dem_`x'=registered_`x'=="D" if registered_`x'!=""
}
gen allegheny=county== "ALLEGHENY"
bys mci_uniq_id9: egen allegheny_any=max(allegheny)
bys mci_uniq_id9: egen allegheny_always=min(allegheny)

 
*Taking max for quantitative variables 
foreach x in rep_12262016 dem_12262016 rep_12242018 dem_12242018 rep_09142020 dem_09142020 rep_02112019 dem_02112019 rep_02062017 dem_02062017 rep_01112021 dem_01112021 rep_02082021 dem_02082021 rep_01172022 dem_01172022 rep_11192012 dem_11192012 male female registered2012 registered2016 registered2018 registered2020 voted2008 voted2010 voted2012 voted2014 voted2016 voted2018 voted2020 voter_status_active voter_status_inactive { 
    bys mci_uniq_id9: egen `x'v2=max(`x')	
	drop `x' 
	rename  `x'v2 `x'
}

bys mci_uniq_id9: keep if _n==1 

keep mci_uniq_id9 rep_12262016 dem_12262016 rep_12242018 dem_12242018 rep_09142020 dem_09142020 rep_02112019 dem_02112019 rep_02062017 dem_02062017 rep_01112021 dem_01112021 rep_02082021 dem_02082021 rep_01172022 dem_01172022 rep_11192012 dem_11192012 male female registered2012 registered2016 registered2018 registered2020 voted2008 voted2010 voted2012 voted2014 voted2016 voted2018 voted2020 voter_status_active voter_status_inactive allegheny_always allegheny_any

foreach i in 12262016 12242018 09142020 02112019 02062017 01172022 11192012 { 
	foreach x in rep dem {
		local year=substr("`i'",-4,4)
		rename `x'_`i' `x'`year'
	}
}
foreach x in rep dem { 
	gen `x'2021=max(`x'_01112021,`x'_02082021)
}
 
drop dem_* rep_* 
reshape long rep dem registered voted, i(mci_uniq_id9) j(year)

save "$data\Processed\Outcomes_mci", replace 

